################################################################################-
# Replication File for Wratil, Wäckerle and Proksch: Government Rhetoric and the 
# Representation of Public Opinion in International Negotiations
#
# This script runs the main STM model in the paper, as well as the estimateEffect 
# function. It then creates regression results for the topics of interest.
# Four parts of this script take significant amount of time:
# ... The main STM (about 10 minutes)
# ... The adapted estimateEffect function (about 70 minutes)
# ... The main regression table (about 105 minutes)
# ... The interaction effect significance calculation (about 100 minutes)
# These calculations can be skipped by leaving the settings below at "no".
# If you want to include these steps, switch the respective settings to "yes".
#
# Make sure you have reshape2 installed.
#
# Additionally, the script produces the following graphs and tables:
# Figure 2
# Figure 3
# Figure 4
# Figure I1
# Figure I2
# Figure I3
# Figure I4
# Figure I6
# Figure I7
# Figure I8
# Figure K4
# Table A2
# Table A3
# Table I2
# Table I3
# Table I4
# Table I5
# Table I6
# Table I7
################################################################################-

#### Set Up and Load Data ####

source("99_functions.R")
library(tidyverse)  #version 1.3.2
library(quanteda)   #version 3.2.1
library(stm)        #version 1.3.6
library(tidytext)   #version 0.3.3
library(cowplot)    #version 1.1.1
library(ggforce)    #version 0.3.3
library(reshape)    #version 0.8.9

load("generated_data/dfm_for_stm.RData")

#### Settings ####
run_stm                     <- "no"
run_estimate_effect         <- "no"
run_regressions             <- "no"
run_regression_interactions <- "no"

#################-
#### Main STM ####
#################-

data_dfm_main <- data_dfm
docvars(data_dfm_main) <- docvars(data_dfm_main) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final)
data_dfm_main <- dfm_subset(data_dfm_main,complete.cases(docvars(data_dfm_main)))

data.sum <- docvars(data_dfm_main)

data.sum %>% 
  filter(!is.na(gov_eu_supporter)) %>% 
  group_by(gov_eu_supporter) %>% 
  summarise(number = n(),
            share = n()/nrow(data.sum))

table(data.sum$Council_Config_final)
table(data.sum$north_south)
table(data.sum$Negotiation_Stage)
table(data.sum$budget_any)

data.sum %>% 
  filter(!duplicated(Transcription)) %>% 
  select(budget_any) %>% 
  table()

data.sum %>% 
  filter(!duplicated(Transcription)) %>% 
  select(Negotiation_Stage) %>% 
  table()

data.sum %>% 
  filter(!duplicated(Transcription)) %>% 
  select(unanimity_any) %>% 
  table() %>% 
  prop.table()

data.sum$ntoken_original <- ntoken(data.sum$text_original)

  out <- data.sum %>% 
    group_by(Council_Config_final) %>% 
    summarise(num_debates = n_distinct(Transcription),
              part_govs = n()/3,
              tokens = sum(ntoken_original)) %>% 
    mutate(avg_participation = part_govs/num_debates,
           avg_tokens = tokens/part_govs) %>% 
  select(-tokens) %>% 
  bind_rows(
    data.sum %>% 
      summarise(num_debates = n_distinct(Transcription),
                part_govs = n()/3,
                tokens = sum(ntoken_original)) %>% 
      mutate(avg_participation = part_govs/num_debates,
             avg_tokens = tokens/part_govs) %>% 
      select(-tokens) %>% 
      add_column(Council_Config_final = "TOTAL",.before = 1)
    
  ) %>% 
    mutate(part_govs = part_govs*3) %>% 
    dplyr::rename("Total Number of Debates" = "num_debates",
                  "Total Number of Speech Parts" = "part_govs",
                  "Average Number of Participating Govs" = "avg_participation",
                  "Average Speech Length" = "avg_tokens",
                  "Council Configuration" = "Council_Config_final"
    )

# This is Table A2
xtable::xtable(out) %>% print(include.rownames=FALSE)

out <- data.sum %>% 
  group_by(Council_Config_final) %>% 
  summarise(first_deb = min(date_correct),
            last_deb = max(date_correct)) %>% 
  dplyr::rename("Council Configuration" = "Council_Config_final",
                "First Debate" = "first_deb",
                "Last Debate" = "last_deb"
  ) %>% 
  bind_rows(
    data.sum %>% 
      summarise(first_deb = min(date_correct),
                last_deb = max(date_correct)) %>% 
      dplyr::rename("First Debate" = "first_deb",
                    "Last Debate" = "last_deb"
      ) %>% 
      add_column(`Council Configuration` = "TOTAL",.before = 1)
    
  ) 

out$`First Debate` <- as.character(out$`First Debate`)
out$`Last Debate` <- as.character(out$`Last Debate`)

# This is Table A3
xtable::xtable(out) %>% print(include.rownames=FALSE)

####  ...Run STM ####

if(run_stm == "yes"){
  
  start.time <- Sys.time()
  set.seed(1711)
  stm_out_40_main_model <- stm(documents = data_dfm_main,K=40,
                               prevalence=~ gov_eu_supporter*image_lag6m_scaled+
                                 part_of_speech+
                                 gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                 unemployment_scaled+inflation_scaled+north_south+
                                 Negotiation_Stage+Council_Config_final,
                               data = docvars(data_dfm_main),
                               init.type = "Spectral")
  
  end.time <- Sys.time()
  
  end.time-start.time
  
  save(file="generated_data/stm_out_40_main_model.RData",stm_out_40_main_model)
}else{
  load(file="generated_data/stm_out_40_main_model.RData") 
  print("Loaded saved STM")
}

#### ...Figure 2: Procedural Topics Plot ####

data.sum <- docvars(data_dfm_main)

td_beta <- tidy(stm_out_40_main_model)

td_gamma <- tidy(stm_out_40_main_model, matrix = "gamma",
                 document_names = rownames(data_dfm_main))

top_terms <- td_beta %>%
  arrange(beta) %>%
  group_by(topic) %>%
  top_n(5, beta) %>%
  arrange(-beta) %>%
  select(topic, term) %>%
  summarise(terms = list(term)) %>%
  mutate(terms = map(terms, paste, collapse = ", ")) %>% 
  unnest(cols = c(terms))

gamma_terms <- td_gamma %>%
  group_by(topic) %>%
  summarise(gamma = mean(gamma)) %>%
  arrange(desc(gamma)) %>%
  left_join(top_terms, by = "topic") %>%
  mutate(topic = paste0("Topic ", topic),
         topic = reorder(topic, gamma))

gamma_terms <- gamma_terms %>% 
  mutate(Type = case_when(
    topic %in% c("Topic 1","Topic 5","Topic 6",
                 "Topic 8","Topic 12",
                 "Topic 13","Topic 18",
                 "Topic 22","Topic 23","Topic 24","Topic 25",
                 "Topic 32","Topic 35",
                 "Topic 36","Topic 37","Topic 38",
                 "Topic 39","Topic 40") ~ "Procedural Topic",
    TRUE ~ "Substantial Topic"
    
  ))
gamma_terms <- gamma_terms %>% 
  mutate(sector = case_when(
    topic %in% c("Topic 3","Topic 20","Topic 21","Topic 30")  ~ "Employment, Social Policy, Health and Consumer Affairs",
    topic %in% c("Topic 4","Topic 10","Topic 11","Topic 14",
                 "Topic 15","Topic 26")                       ~ "Environment",
    topic %in% c("Topic 7","Topic 16","Topic 24","Topic 29")  ~ "Justice and Home Affairs",
    topic %in% c("Topic 2","Topic 9","Topic 17","Topic 19",
                 "Topic 27","Topic 28","Topic 31")            ~ "Economic and Financial Affairs",
    topic %in% c("Topic 33","Topic 34")                       ~ "Competitiveness",
    
    
    TRUE ~ "Other"
  ))
gamma_terms$topic_name <- gamma_terms$topic %>% 
  recode("Topic 1"  = "Thanking I (rather specific person)",
         "Topic 2"  = "Exchange of information",
         "Topic 3"  = "Internal market",
         "Topic 4"  = "Ships",
         "Topic 5"  = "Reflection",
         "Topic 6"  = "Thanking II (rather abstract groups)",
         "Topic 7"  = "European judicial matters / \nEuropean public prosecutor",
         "Topic 8"  = "Referring to Commissioner",
         "Topic 9"  = "Banking supervision",
         "Topic 10" = "Burdens of implementation \n(esp. environment)",
         "Topic 11" = "Renewable energy and climate",
         "Topic 12" = "Delaying agreement",
         "Topic 13" = "Formulating a demand",
         "Topic 14" = "Invasive species",
         "Topic 15" = "GMOs",
         "Topic 16" = "Crime",
         "Topic 17" = "Budget",
         "Topic 18" = "Talking about legal text",
         "Topic 19" = "International money laundering",
         "Topic 20" = "Governance of four freedoms \n(e.g. capital, workers)",
         "Topic 21" = "Health and medical devices",
         "Topic 22" = "Supporting the compromise",
         "Topic 23" = "Congratulating",
         "Topic 24" = "Legal harmonization",
         "Topic 25" = "Cooperation between member states",
         "Topic 26" = "Air emissions and pollutants",
         "Topic 27" = "Tax evasion and fraud",
         "Topic 28" = "Banking union (esp. resolution \nand deposit insurance)",
         "Topic 29" = "Data protection",
         "Topic 30" = "Public procurement and tobacco",
         "Topic 31" = "Financial crisis",
         "Topic 32" = "More technical-level discussion needed",
         "Topic 33" = "Research and development \n(e.g. Horizon 2020)",
         "Topic 34" = "Audit and control",
         "Topic 35" = "Talking about reaching compromise",
         "Topic 36" = "Negotiations with EP",
         "Topic 37" = "Brief intervention",
         "Topic 38" = "Cautious language",
         "Topic 39" = "Affirmation",
         "Topic 40" = "Raising a concern"
  )

gamma_terms$terms <- gsub("_","\\_",gamma_terms$terms)

main_model_all_words <- stm::labelTopics(stm_out_40_main_model,n = 5)
main_model_frex_words <- main_model_all_words[["frex"]]

main_model_frex <- data.frame(topic = paste("Topic",1:40),
                              frex_terms=NA)
for(i in 1:nrow(main_model_frex)){
  main_model_frex$frex_terms[i] <- paste(main_model_frex_words[i,1:5],collapse=", ")
}

gamma_terms <- gamma_terms %>% left_join(main_model_frex)

proc.plot <- gamma_terms %>%
  filter(Type=="Procedural Topic") %>% 
  ggplot(aes(reorder(topic_name,gamma), gamma, label = frex_terms)) +
  geom_col(show.legend = FALSE,width = .8) +
  geom_text(hjust = 0, nudge_y = 0.0005,size=2.5) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0),
                     limits = c(0, 0.23)) +
  theme_bw()+
  labs(x = NULL, y = "Expected Topic Proportion")+
  theme(axis.text=element_text(colour="black"),
        axis.title = element_text(size=8,colour="black"),
        axis.ticks.y = element_blank())

# This plots Figure 2
proc.plot
ggsave("figures_main_paper/figure_2.eps", width = 6, height = 3.5, units = "in")
  
tiff("figures_main_paper/figure_2.tiff", units="in", width=6, height=3.5, res=1000)
proc.plot
endoffile <- dev.off() 

#### ...Figure 3: Substantial Topics Plot ####

gamma_terms <- gamma_terms %>% 
  separate(frex_terms,into=c("term1","term2","term3","term4","term5"),sep=", ",remove=FALSE) %>% 
  mutate(frex_terms_new = paste0(term1,", ",term2,", ",term3,",\n",term4,", ",term5))

substan.plot <- gamma_terms %>%
  filter(Type=="Substantial Topic") %>% 
  ggplot(aes(reorder(topic_name,gamma), gamma, label = frex_terms)) +
  geom_col(width=0.8)+
  geom_text(hjust = 0, nudge_y = 0.0005,size=2.5) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0),
                     limits = c(0, 0.11)) +
  theme_bw()+
  labs(x = NULL, y = "Expected Topic Proportion")+
  ggforce::facet_col(facets = vars(sector), 
                     scales = "free_y", 
                     space = "free")+
  scale_color_manual(values = c("black")) +
  theme(axis.text=element_text(colour="black"),
        strip.text = element_text(angle = 0, face="bold"),
        strip.background = element_blank(),
        axis.title = element_text(size=8,colour="black"),
        axis.ticks.y = element_blank(),
        panel.background = element_blank(), 
        panel.spacing.x = unit(0,"line"))
substan.plot

# This plots Figure 3
substan.plot
ggsave("figures_main_paper/figure_3.eps", width = 6, height = 7, units = "in")
  
tiff("figures_main_paper/figure_3.tiff", units="in", width=6, height=7, res=1000)
substan.plot
endoffile <- dev.off() 
  
#### ...Main Analysis (estimateEffect) ####

# estimateEffect_cluster function below takes around 70 minutes.

if(run_estimate_effect == "yes"){
  set.seed(1711)
  prep_council_40_save <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ gov_eu_supporter*image_lag6m_scaled+
                                                   part_of_speech+
                                                   gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                                   unemployment_scaled+inflation_scaled+north_south+
                                                   Negotiation_Stage+Council_Config_final,
                                                 stm_out_40_main_model,nsims = 10000,
                                                 meta = docvars(data_dfm_main), uncertainty = "Global")
  

  save(file="generated_data/prep_council_40.RData",prep_council_40_save)
}else{
  print("Loading saved estimateEffect")
  load(file="generated_data/prep_council_40.RData")  
}

#### ...Regression Tables ####

if(run_regressions == "yes"){
  mod_top_12 <- summary.estimateEffect_comb(prep_council_40_save,topics = 12)[[1]]
  mod_top_13 <- summary.estimateEffect_comb(prep_council_40_save,topics = 13)[[1]]
  mod_top_22 <- summary.estimateEffect_comb(prep_council_40_save,topics = 22)[[1]]
  mod_top_32 <- summary.estimateEffect_comb(prep_council_40_save,topics = 32)[[1]]
  mod_top_38 <- summary.estimateEffect_comb(prep_council_40_save,topics = 38)[[1]]
  mod_top_40 <- summary.estimateEffect_comb(prep_council_40_save,topics = 40)[[1]]

  save(file="generated_data/regression_results_main.RData",mod_top_12,
       mod_top_13,mod_top_22,mod_top_32,mod_top_38,mod_top_40)
}else{
  print("Loading saved regressions")
  load(file="generated_data/regression_results_main.RData")  
}

data.sum$fake_theta <- runif(n = nrow(data.sum),min = 0,max = 1)
m1 <- lm(fake_theta ~ gov_eu_supporter*image_lag6m_scaled+
                  part_of_speech+
                  gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                  unemployment_scaled+inflation_scaled+north_south+
                  Negotiation_Stage+Council_Config_final,
                data = data.sum)

# This is Table B2
stargazer::stargazer(m1,m1,m1,m1,m1,m1,
                     #  type = "text",
                     dep.var.labels   = "Topic Prevalence",
                     column.labels = c("Topic 12","Topic 13","Topic 22",
                                       "Topic 32","Topic 38","Topic 40"),
                     covariate.labels = c("Intercept","Eurosceptic Government", "Public Image of the EU", "Middle Part of Speech",
                                          "End Part of Speech", "Government Left-Right position", "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA",
                                          "Eurosceptic Government x Public Image of the EU"),
                     coef = list(mod_top_12$tables[[1]][,1],
                                 mod_top_13$tables[[1]][,1],
                                 mod_top_22$tables[[1]][,1],
                                 mod_top_32$tables[[1]][,1],
                                 mod_top_38$tables[[1]][,1],
                                 mod_top_40$tables[[1]][,1]),
                     se = list(mod_top_12$tables[[1]][,2],
                               mod_top_13$tables[[1]][,2],
                               mod_top_22$tables[[1]][,2],
                               mod_top_32$tables[[1]][,2],
                               mod_top_38$tables[[1]][,2],
                               mod_top_40$tables[[1]][,2]),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)

# The calculations below take about 1 hour 45 minutes.

#### ...Figure 4: Regression Interactions ####

if(run_regression_interactions == "yes"){
  #### Regression Topic 12 ###
  tab_12 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 12,
                                        estimate_object = prep_council_40_save)
  
  #### Regression Topic 13 ###
  tab_13 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 13,
                                        estimate_object = prep_council_40_save)
  
  #### Regression Topic 22 ###
  tab_22 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 22,
                                        estimate_object = prep_council_40_save)
  
  #### Regression Topic 32 ###
  tab_32 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 32,
                                        estimate_object = prep_council_40_save)
  
  #### Regression Topic 38 ###
  tab_38 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 38,
                                        estimate_object = prep_council_40_save)
  
  #### Regression Topic 40 ###
  tab_40 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 40,
                                        estimate_object = prep_council_40_save)
  
  results_all <- bind_rows(tab_12,tab_13,tab_22,
                           tab_32,tab_38,tab_40) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Raising a concern",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error)
  
  results_all$moderator_level <- results_all$moderator_level %>% 
    recode("baseline" = "Pro-EU Government",
           "gov_eu_supporterEurosceptic Government:image_lag6m_scaled" = "Eurosceptic Government")
  
  save(file="generated_data/regression_results_main_interaction.RData",results_all)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_main_interaction.RData")
}

# This is Table B1
xtable::xtable(results_all %>% select(-upper,-lower) %>% dplyr::rename('Government Ideology' = "moderator_level",
                                                                'Topic Label' = "Topic_name",
                                                                't-value' = "tval",
                                                                'p-value' = "p"),digits=5) %>% 
  print(include.rownames=FALSE)

europhile_forest <- results_all %>% 
  dplyr::rename("term" = "Topic_name",
         "model" = "moderator_level",
         "estimate" = "Estimate",
         "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Pro-EU Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme(plot.title = element_text(face="bold"),
        legend.position = "none",
        axis.text=element_text(colour="black"),
        axis.title=element_text(size=8,colour="black"),
        strip.background = element_rect(fill = "white", colour = NA),
        strip.text.x = element_text(face="bold"),
        strip.text.y = element_text(angle = 0, face="bold"),
        axis.ticks.y = element_blank(),
        panel.background = element_blank(), 
        panel.spacing.x = unit(0,"line"))

eurosceptic_forest <- results_all %>% 
  dplyr::rename("term" = "Topic_name",
         "model" = "moderator_level",
         "estimate" = "Estimate",
         "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Eurosceptic Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme(plot.title = element_text(face="bold"),
        legend.position = "none",
        axis.text=element_text(colour="black"),
        axis.title=element_text(size=8,colour="black"),
        strip.background = element_rect(fill = "white", colour = NA),
        strip.text.x = element_text(face="bold"),
        strip.text.y = element_text(angle = 0, face="bold"),
        axis.ticks.y = element_blank(),
        panel.background = element_blank(), 
        panel.spacing.x = unit(0,"line"))

# This plots Figure 4
ggpubr::ggarrange(eurosceptic_forest,europhile_forest,ncol=1)
ggsave("figures_main_paper/figure_4.eps", width = 6, height = 6, units = "in")
  
tiff("figures_main_paper/figure_4.tiff", units="in", width=6, height=6, res=1000)
ggpubr::ggarrange(eurosceptic_forest,europhile_forest,ncol=1)
endoffile <- dev.off() 
  
#### ...Save most strongly connected words (prob and frex) ####

## Frex words
main_model_all_words_20 <- stm::labelTopics(stm_out_40_main_model,n = 20)
main_model_all_words_20_frex <- cbind(data.frame(Topic=paste("Topic",1:40)),main_model_all_words_20[["frex"]])

for(i in 1:nrow(main_model_all_words_20_frex)){
  main_model_all_words_20_frex$combined[i] <-paste(main_model_all_words_20_frex[i,2:21],collapse=", ")
}

# This is Table I2
main_model_all_words_20_frex %>% 
  select(Topic,combined) %>% 
  xtable::xtable() %>% 
  print(include.rownames=FALSE)

write.csv(file="generated_data/frex_words_40_main_model.csv",main_model_all_words_20_frex)

main_model_all_words_20 <- stm::labelTopics(stm_out_40_main_model,n = 20)
main_model_all_words_20_prob <- cbind(data.frame(Topic=paste("Topic",1:40)),main_model_all_words_20[["prob"]])

for(i in 1:nrow(main_model_all_words_20_prob)){
  main_model_all_words_20_prob$combined[i] <-paste(main_model_all_words_20_prob[i,2:21],collapse=", ")
}

# This is Table I3
main_model_all_words_20_prob %>% 
  select(Topic,combined) %>% 
  xtable::xtable() %>% 
  print(include.rownames=FALSE)

# Test: Is there an additional citizen-related topic?
main_model_all_words_20_frex %>% 
  filter(grepl("vote|citizen|public",combined))

# Test: Is there an additional citizen-related topic?
main_model_all_words_20_prob %>% 
  filter(grepl("vote|citizen|public",combined))


#### ...Save most strongly connected texts ####

saved_texts_40_main_model <- data.frame(matrix(ncol=10,nrow=40))
names(saved_texts_40_main_model) <- c("Text1","Text2","Text3","Text4","Text5",
                                      "Text6","Text7","Text8","Text9","Text10")

for(i in 1:nrow(saved_texts_40_main_model)){
  thoughts_save <- findThoughts(stm_out_40_main_model, texts = docvars(data_dfm_main)$text_original, n = 20,topics = i)$docs[[1]]
  saved_texts_40_main_model[i,] <- thoughts_save
}
saved_texts_40_main_model$Topic = paste("Topic",1:40)

saved_texts_table <- saved_texts_40_main_model %>% 
  select(Topic,Text1,Text2,Text3,Text4,Text5,Text6,Text7,Text8,Text9,Text10) %>% 
  pivot_longer(cols=starts_with("Text")) %>% 
  filter(Topic %in% c("Topic 12","Topic 13","Topic 22","Topic 32","Topic 38","Topic 40"))

saved_texts_table$Topic<- saved_texts_table$Topic %>% 
  recode("Topic 12" = "Topic 12: Delaying agreement",
         "Topic 13" = "Topic 13: Formulating a demand",
         "Topic 22" = "Topic 22: Supporting the compromise",
         "Topic 32" = "Topic 32: More technical-level discussion needed",
         "Topic 38" = "Topic 38: Cautious language",
         "Topic 40" = "Topic 40: Raising a concern"
  )

# This is Table I4
saved_texts_table %>% 
  select(Topic,value) %>% 
  xtable::xtable() %>% 
  print(include.rownames=FALSE)

####  ...Gamma Country and Configuration ####
td_gamma_countries <- tidy(stm_out_40_main_model, matrix = "gamma",                    
                           document_names = docvars(data_dfm_main,"Actor"))

td_gamma_countries <- td_gamma_countries %>% 
  mutate(topic = paste("Topic",topic)) %>% 
  mutate(category = case_when(
    topic %in% c("Topic 12","Topic 13","Topic 22","Topic 32",
                 "Topic 38","Topic 40") ~ "Procedural",
    TRUE ~ "Policy-specific"
  ))

# This is Figure I2
p.i2 <- aggregate(gamma~topic+document+category,td_gamma_countries %>% 
            filter(topic %in% c("Topic 12","Topic 13","Topic 22",
                                "Topic 32","Topic 38","Topic 40",
                                "Topic 7","Topic 9","Topic 11",
                                "Topic 21","Topic 33")) ,mean) %>% 
  mutate(topic = factor(topic,levels = c("Topic 12","Topic 13","Topic 22",
                                         "Topic 32","Topic 38","Topic 40",
                                         "Topic 7","Topic 9","Topic 11",
                                         "Topic 21","Topic 33"))) %>% 
  ggplot(aes(gamma, fill = as.factor(category))) +
  geom_histogram() +
  facet_wrap(~ topic, ncol = 3) +
  theme_minimal()+
  geom_vline(xintercept=0,lty="dashed")+
  labs(y = "Number of Countries", x = "gamma")+#
  theme(legend.position = c(0.8,0.1),
        legend.title = element_blank())

ggsave(plot = p.i2, "figures_appendix/figure_i_2.eps", width = 4.5, height = 6, units = "in")

td_gamma_council <- tidy(stm_out_40_main_model, matrix = "gamma",                    
                         document_names = docvars(data_dfm_main,"Council_Config_final"))

td_gamma_council <- td_gamma_council %>% 
  mutate(topic = paste("Topic",topic)) %>% 
  mutate(category = case_when(
    topic %in% c("Topic 12","Topic 13","Topic 22","Topic 32",
                 "Topic 38","Topic 40") ~ "Procedural",
    TRUE ~ "Policy-specific"
  ))

# This is Figure I1
p.i1 <- aggregate(gamma~topic+document+category,td_gamma_council %>% 
            filter(topic %in% c("Topic 12","Topic 13","Topic 22",
                                "Topic 32","Topic 38","Topic 40",
                                "Topic 7","Topic 9","Topic 11",
                                "Topic 21","Topic 33")) ,mean) %>% 
  mutate(topic = factor(topic,levels = c("Topic 12","Topic 13","Topic 22",
                                         "Topic 32","Topic 38","Topic 40",
                                         "Topic 7","Topic 9","Topic 11",
                                         "Topic 21","Topic 33"))) %>% 
  ggplot(aes(gamma, fill = as.factor(category))) +
  geom_histogram() +#
  facet_wrap(~ topic, ncol = 3) +
  theme_minimal()+
  geom_vline(xintercept=0,lty="dashed")+
  labs(y = "Number of Council Configurations", x = "gamma")+
  theme(legend.position = c(0.8,0.1),
        legend.title = element_blank())
ggsave(plot = p.i1,"figures_appendix/figure_i_1.eps", width = 4.5, height = 6, units = "in")

####  ...Over Time Plot ####

td_gamma_to_join <- td_gamma %>%
  mutate(topic = paste0("Topic ", topic))

table(data.sum$month)
data.sum$month <- format(data.sum$date_correct,"%m")
data.sum <- data.sum %>% 
  mutate(quarter = case_when(
    data.sum$month%in%c("01","02","03") ~ "02",
    data.sum$month%in%c("04","05","06") ~ "05",
    data.sum$month%in%c("07","08","09") ~ "08",
    data.sum$month%in%c("10","11","12") ~ "11",
  ),
  half_year = case_when(
    data.sum$month%in%c("01","02","03","04","05","06") ~ "01",
    data.sum$month%in%c("07","08","09","10","11","12") ~ "07",
  ))
data.sum$year <- format(data.sum$date_correct,"%Y")

table(data.sum$half_year)

data.sum <- data.sum %>% 
  cbind(td_gamma_to_join %>% 
          filter(topic=="Topic 7") %>% 
          select(gamma) %>% 
          dplyr::rename("gamma_topic7" = "gamma"),
        td_gamma_to_join %>% 
          filter(topic=="Topic 9") %>% 
          select(gamma) %>% 
          dplyr::rename("gamma_topic9" = "gamma"),
        td_gamma_to_join %>% 
          filter(topic=="Topic 11") %>% 
          select(gamma) %>% 
          dplyr::rename("gamma_topic11" = "gamma"),
        td_gamma_to_join %>% 
          filter(topic=="Topic 21") %>% 
          select(gamma) %>% 
          dplyr::rename("gamma_topic21" = "gamma"),
        td_gamma_to_join %>% 
          filter(topic=="Topic 33") %>% 
          select(gamma) %>% 
          dplyr::rename("gamma_topic33" = "gamma")) 


agg<- aggregate(gamma_topic7~half_year+year+Council_Config_final,data.sum,mean) %>% 
  left_join(aggregate(gamma_topic9~half_year+year+Council_Config_final,data.sum,mean)) %>% 
  left_join(aggregate(gamma_topic11~half_year+year+Council_Config_final,data.sum,mean)) %>% 
  left_join(aggregate(gamma_topic21~half_year+year+Council_Config_final,data.sum,mean)) %>% 
  left_join(aggregate(gamma_topic33~half_year+year+Council_Config_final,data.sum,mean))

agg$date <- as.Date(paste0("01/",agg$half_year,"/",agg$year),"%d/%m/%Y")

agg$Council_Config_final_short <- agg$Council_Config_final %>% 
  recode("Competitiveness" = "COMPET",
         "Economic and Financial Affairs" = "Ecofin",
         "Justice and Home Affairs" = "JHA",
         "Employment, Social Policy, Health and Consumer Affairs" = "EPSCO",
         "Environment" = "ENV")

agg_print_t7 <- agg %>% select(date,ends_with(paste0("topic",7)),Council_Config_final_short)
agg_print_t7$split <- ifelse(agg_print_t7$Council_Config_final_short=="Justice and Home Affairs","Justice and Home Affairs","All Other Configurations")
agg_print_t7$split <- factor(agg_print_t7$split,levels=c("Justice and Home Affairs","All Other Configurations"))
p.print.7 <- ggplot(agg_print_t7,aes(x=date,y=gamma_topic7))+
  geom_point(aes(shape=Council_Config_final_short))+
  geom_line(aes(lty=Council_Config_final_short))+
  theme_minimal()+
  expand_limits(y=0)+
  #  facet_wrap(~split)+
  labs(x="",y="Topic Prevalence",title="Topic 7: European \njudicial matters / \nEuropean public \nprosecutor")+
  theme(legend.title = element_blank(),
        title=element_text(size=8,face = "bold"))

agg_print_t9 <- agg %>% select(date,ends_with(paste0("topic",9)),Council_Config_final_short)
agg_print_t9$split <- ifelse(agg_print_t9$Council_Config_final_short=="Ecofin","Ecofin","All Other Configurations")
agg_print_t9$split <- factor(agg_print_t9$split,levels=c("Ecofin","All Other Configurations"))
p.print.9 <- ggplot(agg_print_t9,aes(x=date,y=gamma_topic9))+
  geom_point(aes(shape=Council_Config_final_short))+
  geom_line(aes(lty=Council_Config_final_short))+
  theme_minimal()+
  expand_limits(y=0)+
  # facet_wrap(~split)+
  labs(x="",y="Topic Prevalence",title="Topic 9: Banking \nsupervision")+
  theme(legend.title = element_blank(),
        title=element_text(size=8,face = "bold"))

agg_print_t11 <- agg %>% select(date,ends_with(paste0("topic",11)),Council_Config_final_short)
agg_print_t11$split <- ifelse(agg_print_t11$Council_Config_final_short=="ENV","ENV","All Other Configurations")
agg_print_t11$split <- factor(agg_print_t11$split,levels=c("ENV","All Other Configurations"))
p.print.11 <- ggplot(agg_print_t11,aes(x=date,y=gamma_topic11))+
  geom_point(aes(shape=Council_Config_final_short))+
  geom_line(aes(lty=Council_Config_final_short))+
  theme_minimal()+
  expand_limits(y=0)+
  # facet_wrap(~split)+
  labs(x="",y="Topic Prevalence",title="Topic 11: Renewable energy \nand climate")+
  theme(legend.title = element_blank(),
        title=element_text(size=8,face = "bold"))

agg_print_t21 <- agg %>% select(date,ends_with(paste0("topic",21)),Council_Config_final_short)
agg_print_t21$split <- ifelse(agg_print_t21$Council_Config_final_short=="EPSCO","EPSCO","All Other Configurations")
agg_print_t21$split <- factor(agg_print_t21$split,levels=c("EPSCO","All Other Configurations"))
p.print.21 <- ggplot(agg_print_t21,aes(x=date,y=gamma_topic21))+
  geom_point(aes(shape=Council_Config_final_short))+
  geom_line(aes(lty=Council_Config_final_short))+
  theme_minimal()+
  expand_limits(y=0)+
  #  facet_wrap(~split)+
  labs(x="",y="Topic Prevalence",title="Topic 21: Health and medical devices")+
  theme(legend.title = element_blank(),
        title=element_text(size=8,face = "bold"))

agg_print_t33 <- agg %>% select(date,ends_with(paste0("topic",33)),Council_Config_final_short)
agg_print_t33$split <- ifelse(agg_print_t33$Council_Config_final_short=="COMPET","COMPET","All Other Configurations")
agg_print_t33$split <- factor(agg_print_t33$split,levels=c("COMPET","All Other Configurations"))
p.print.33 <- ggplot(agg_print_t33,aes(x=date,y=gamma_topic33))+
  geom_point(aes(shape=Council_Config_final_short))+
  geom_line(aes(lty=Council_Config_final_short))+
  theme_minimal()+
  expand_limits(y=0)+
  #  facet_wrap(~split)+
  labs(x="",y="Topic Prevalence",title="Topic 33: Research and \ndevelopment \n(e.g. Horizon 2020)")+
  theme(legend.title = element_blank(),
        title=element_text(size=8,face = "bold"))

legend <- get_legend(p.print.33)

# This is Figure 3
p.i3 <-gridExtra::grid.arrange(p.print.7 + theme(legend.position="none"),
                        p.print.9 + theme(legend.position="none"),
                        p.print.11 + theme(legend.position="none"),
                        p.print.21 + theme(legend.position="none"),
                        p.print.33 + theme(legend.position="none"),legend,ncol=2)
ggsave(plot = p.i3,"figures_appendix/figure_i_3.eps", width = 6, height = 6, units = "in")

#### ...Correlations between Topics ####

main_topic_cor <- topicCorr(stm_out_40_main_model)$cor %>% as.data.frame()
main_topic_cor_frame <- reshape::melt(main_topic_cor)
main_topic_cor_frame$cor_topic <- 1:40
main_topic_cor_frame$variable <- gsub("V","",main_topic_cor_frame$variable) %>% as.numeric()

main_topic_cor_frame$variable_a_name<- main_topic_cor_frame$variable %>% 
  recode("1"  = "Topic 1: Thanking I (rather specific person)",
         "2"  = "Topic 2: Exchange of information",
         "3"  = "Topic 3: Internal market",
         "4"  = "Topic 4: Ships",
         "5"  = "Topic 5: Reflection",
         "6"  = "Topic 6: Thanking II (rather abstract groups)",
         "7"  = "Topic 7: European judicial matters / \nEuropean public prosecutor",
         "8"  = "Topic 8: Referring to Commissioner",
         "9"  = "Topic 9: Banking supervision",
         "10" = "Topic 10: Burdens of implementation \n(esp. environment)",
         "11" = "Topic 11: Renewable energy and climate",
         "12" = "Topic 12: Delaying agreement",
         "13" = "Topic 13: Formulating a demand",
         "14" = "Topic 14: Invasive species",
         "15" = "Topic 15: GMOs",
         "16" = "Topic 16: Crime",
         "17" = "Topic 17: Budget",
         "18" = "Topic 18: Talking about legal text",
         "19" = "Topic 19: International money laundering",
         "20" = "Topic 20: Governance of four freedoms \n(e.g. capital, workers)",
         "21" = "Topic 21: Health and medical devices",
         "22" = "Topic 22: Supporting the compromise",
         "23" = "Topic 23: Congratulating",
         "24" = "Topic 24: Legal harmonization",
         "25" = "Topic 25: Cooperation between member states",
         "26" = "Topic 26: Air emissions and pollutants",
         "27" = "Topic 27: Tax evasion and fraud",
         "28" = "Topic 28: Banking union (esp. \nresolution and deposit insurance)",
         "29" = "Topic 29: Data protection",
         "30" = "Topic 30: Public procurement and tobacco",
         "31" = "Topic 31: Financial crisis",
         "32" = "Topic 32: More technical-level discussion needed",
         "33" = "Topic 33: Research and development \n(e.g. Horizon 2020)",
         "34" = "Topic 34: Audit and control",
         "35" = "Topic 35: Talking about reaching compromise",
         "36" = "Topic 36: Negotiations with EP",
         "37" = "Topic 37: Brief intervention",
         "38" = "Topic 38: Cautious language",
         "39" = "Topic 39: Affirmation",
         "40" = "Topic 40: Raising a concern"
  )
main_topic_cor_frame <- main_topic_cor_frame %>% 
  mutate(Type_a = case_when(
    variable %in% c("1","5","6",
                    "8","12",
                    "13","18",
                    "22","23","24","25",
                    "32","35",
                    "36","37","38",
                    "39","40") ~ "Procedural Topic",
    TRUE ~ "Substantial Topic"
    
  ))
main_topic_cor_frame$variable_b_name<- main_topic_cor_frame$cor_topic %>% 
  recode("1"  = "Topic 1: Thanking I (rather specific person)",
         "2"  = "Topic 2: Exchange of information",
         "3"  = "Topic 3: Internal market",
         "4"  = "Topic 4: Ships",
         "5"  = "Topic 5: Reflection",
         "6"  = "Topic 6: Thanking II (rather abstract groups)",
         "7"  = "Topic 7: European judicial matters / \nEuropean public prosecutor",
         "8"  = "Topic 8: Referring to Commissioner",
         "9"  = "Topic 9: Banking supervision",
         "10" = "Topic 10: Burdens of implementation \n(esp. environment)",
         "11" = "Topic 11: Renewable energy and climate",
         "12" = "Topic 12: Delaying agreement",
         "13" = "Topic 13: Formulating a demand",
         "14" = "Topic 14: Invasive species",
         "15" = "Topic 15: GMOs",
         "16" = "Topic 16: Crime",
         "17" = "Topic 17: Budget",
         "18" = "Topic 18: Talking about legal text",
         "19" = "Topic 19: International money laundering",
         "20" = "Topic 20: Governance of four freedoms \n(e.g. capital, workers)",
         "21" = "Topic 21: Health and medical devices",
         "22" = "Topic 22: Supporting the compromise",
         "23" = "Topic 23: Congratulating",
         "24" = "Topic 24: Legal harmonization",
         "25" = "Topic 25: Cooperation between member states",
         "26" = "Topic 26: Air emissions and pollutants",
         "27" = "Topic 27: Tax evasion and fraud",
         "28" = "Topic 28: Banking union (esp. \nresolution and deposit insurance)",
         "29" = "Topic 29: Data protection",
         "30" = "Topic 30: Public procurement and tobacco",
         "31" = "Topic 31: Financial crisis",
         "32" = "Topic 32: More technical-level discussion needed",
         "33" = "Topic 33: Research and development \n(e.g. Horizon 2020)",
         "34" = "Topic 34: Audit and control",
         "35" = "Topic 35: Talking about reaching compromise",
         "36" = "Topic 36: Negotiations with EP",
         "37" = "Topic 37: Brief intervention",
         "38" = "Topic 38: Cautious language",
         "39" = "Topic 39: Affirmation",
         "40" = "Topic 40: Raising a concern"
  )

main_topic_cor_frame <- main_topic_cor_frame %>% 
  mutate(Type_b = case_when(
    cor_topic %in% c("1","5","6",
                     "8","12",
                     "13","18",
                     "22","23","24","25",
                     "32","35",
                     "36","37","38",
                     "39","40") ~ "Procedural Topic",
    TRUE ~ "Substantial Topic"
  ))

# This is Figure I4
main_topic_cor_frame %>% 
  filter(value<1) %>% 
  ggplot(aes(x=reorder(as.character(variable),variable), y=reorder(cor_topic,cor_topic), fill=value)) + 
  geom_tile(color = "white")+
  labs(x="Topic in Baseline Model",y="Topic in Baseline Model")+
  scale_fill_gradient2(low = "blue", high = "red", 
                       midpoint = median(main_topic_cor_frame$value), 
                       limit = c(main_topic_cor_frame %>% 
                                   filter(value<1) %>% 
                                   select(value) %>% 
                                   min(),
                                 main_topic_cor_frame %>% 
                                   filter(value<1) %>% 
                                   select(value) %>% 
                                   max()), 
                       space = "Lab", 
                       name="Correlation") +
  theme_bw()+
  coord_fixed()+
  theme(axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8),
        axis.title = element_text(size=12))
ggsave("figures_appendix/figure_i_4.eps", width = 7, height = 7, units = "in")

# This is Table I5
main_topic_cor_frame %>% 
  select(value,variable_a_name,variable_b_name,Type_a,Type_b) %>% 
  filter(value<1) %>% 
  filter(!duplicated(value)) %>% 
  arrange(-value) %>% 
  filter(value>=0.30) %>% 
  xtable::xtable() %>% 
  print(include.rownames = FALSE)

#### ...Correlation Explicit and Implicit Mentions ####

## Implicit Mentions

data_dfm_main$identifier <- paste0("document_nr_",1:nrow(data.sum))
td_gamma_corp <- tidy(stm_out_40_main_model, matrix = "gamma",                    
                      document_names = docvars(data_dfm_main,"identifier"))

td_gamma_corp <- td_gamma_corp %>% 
  mutate(topic = paste("Topic",topic)) %>% 
  mutate(category = case_when(
    topic %in% c("Topic 12","Topic 13","Topic 22","Topic 32",
                 "Topic 38","Topic 40") ~ "Key Procedural Topic from Main Analysis",
    topic %in% c("Topic 1","Topic 5","Topic 6",
                 "Topic 8","Topic 18",
                 "Topic 23","Topic 24","Topic 25",
                 "Topic 35",
                 "Topic 36","Topic 37",
                 "Topic 39") ~ "Other Procedural Topic",
    TRUE ~ "Policy-specific"
  ))

## Explicit mentions
dict.public <- dictionary(list(public= c("public*","people*","citizen*","public_opinion",
                                         "voter*","constituent*","inhabitant*",
                                         "resident*","taxpayer*")))

load(file="generated_data/corpus_for_final_analysis.RData")

corpus_orig_text <- corp_seg2 %>% 
  docvars() %>% 
  corpus(text_field = "text_original") 

kwic_coding <- read.csv("data/KWIC APSR Coding.csv")

kwic_coding <- kwic_coding %>% 
  mutate(code_word = case_when(
    code == "1" ~ "Noun, refers to a general, vague public",
    code == "2" ~ "Noun, refers to national/European public",
    code == "3" ~ "Part of set term, such as 'public procurement', 'public prosecutor'",
    code == "4" ~ "Adjective",
    code == "5" ~ "Other"
  ))

kwic_doclevel <- kwic_coding %>% 
  group_by(docname) %>% 
  summarise(coded = n()) %>% 
  left_join(kwic_coding %>% 
              filter(code%in%c(1,2)) %>% 
              group_by(docname) %>% 
              summarise(coded_nouns = n())) 
kwic_doclevel$coded_nouns[is.na(kwic_doclevel$coded_nouns)] <- 0

public.vars <- docvars(corpus_orig_text) %>% 
  cbind(docnames(corpus_orig_text)) %>% 
  mutate(Actor_Transcription = paste(Actor,Transcription,sep="_")) %>% 
  dplyr::rename("docname" = 'docnames(corpus_orig_text)') %>% 
  left_join(kwic_doclevel)

public.vars$coded[is.na(public.vars$coded)] <- 0
public.vars$coded_nouns[is.na(public.vars$coded_nouns)] <- 0

public.vars <- public.vars %>% 
  select(coded,coded_nouns,gov_eu_supporter,image_lag6m_scaled,
         date_correct,Transcription,
         part_of_speech,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final)
public.vars <- public.vars %>% filter(complete.cases(public.vars))
  
public.vars <- public.vars %>% 
  mutate(document = paste0("document_nr_",1:nrow(public.vars)))
dat.corr.check.nouns <- td_gamma_corp %>% 
  left_join(public.vars)

# This is Table I6
dat.corr.check.nouns %>% 
  group_by(topic,category) %>% 
  summarise(corrs = cor(gamma,coded)) %>% 
  arrange(-corrs) %>% 
  dplyr::rename('Correlation between Topic Prevalence and Mentions of the Public' = corrs) %>% 
  xtable::xtable() %>% 
  print(include.rownames=FALSE)

# This is Table I7
dat.corr.check.nouns %>% 
  group_by(topic,category) %>% 
  summarise(corrs = cor(gamma,coded_nouns)) %>% 
  arrange(-corrs) %>% 
  dplyr::rename('Correlation between Topic Prevalence and Mentions of the Public' = corrs) %>% 
  xtable::xtable() %>% 
  print(include.rownames=FALSE)

###########################################-
#### ...Plot Predicted Proportions ####
###########################################-

data.ranges <- data.sum %>% 
  group_by(gov_eu_supporter) %>% 
  summarise(min_values = min(image_lag6m_scaled),
            max_values = max(image_lag6m_scaled),
            perc_10 = quantile(image_lag6m_scaled,0.1),
            perc_90 = quantile(image_lag6m_scaled,0.9))

topics_detailed <- data.frame(Topic = c(12,13,22,32,38,40),
                              Topic_name = c("Delaying agreement",
                                             "Formulating a demand",
                                             "Supporting the compromise",
                                             "More technical-level discussion needed",
                                             "Cautious language",
                                             "Raising a concern"))

point_estimates_store <- data.frame()

for (i in topics_detailed$Topic){
  europhile.data <- plot(prep_council_40_save, covariate = "image_lag6m_scaled", model = stm_out_40_main_model,
                         topics = i,
                         method = "continuous", xlab = "Public Image of the EU", moderator = "gov_eu_supporter",
                         moderator.value = "Europhile Government", linecol = "blue",omit.plot = TRUE,
                         printlegend = FALSE)
  europhile.data.frame <- data.frame(x=europhile.data$x,
                                     means=as.numeric(europhile.data$means[[1]]),
                                     conf.low=as.vector(europhile.data$ci[[1]][1,]),
                                     conf.high=as.vector(europhile.data$ci[[1]][2,]),
                                     class = "Europhile Government",
                                     stringsAsFactors = F)
  eurosceptic.data <- plot(prep_council_40_save, covariate = "image_lag6m_scaled", model = stm_out_40_main_model,
                           topics = i,
                           method = "continuous", xlab = "Public Image of the EU", moderator = "gov_eu_supporter",
                           moderator.value = "Eurosceptic Government", linecol = "blue",omit.plot = TRUE,
                           printlegend = FALSE)
  eurosceptic.data.frame <- data.frame(x=eurosceptic.data$x,
                                       means=as.numeric(eurosceptic.data$means[[1]]),
                                       conf.low=as.vector(eurosceptic.data$ci[[1]][1,]),
                                       conf.high=as.vector(eurosceptic.data$ci[[1]][2,]),
                                       class = "Eurosceptic Government",
                                       stringsAsFactors = F)
  
  point_estimates_save <- bind_rows(europhile.data.frame,eurosceptic.data.frame)
  point_estimates_save$topic_number <- i
  point_estimates_save$topic_name <- topics_detailed$Topic_name[topics_detailed$Topic==i]
  
  point_estimates_store <- bind_rows(point_estimates_store,point_estimates_save)
  
  print(i)
}
  
getmode <- function(v) {
  uniqv <- unique(v)
  uniqv[which.max(tabulate(match(v, uniqv)))]
}
median(data.sum$gov_lr_cmp_static_scaled) %>% round(2)
median(data.sum$eu_receipts_gdp_scaled) %>% round(2)
median(data.sum$unemployment_scaled) %>% round(2)
median(data.sum$inflation_scaled) %>% round(2)

getmode(data.sum$part_of_speech)
getmode(data.sum$budget_any)
getmode(data.sum$unanimity_any)
getmode(data.sum$north_south)
getmode(data.sum$Negotiation_Stage)
getmode(data.sum$Council_Config_final)

point_estimates_store <- point_estimates_store %>% 
  filter(class=="Europhile Government" & between(x,
                                                 data.ranges %>% filter(gov_eu_supporter=="Europhile Government") %>% select(min_values) %>% unlist(),
                                                 data.ranges %>% filter(gov_eu_supporter=="Europhile Government") %>% select(max_values) %>% unlist())|
           class=="Eurosceptic Government" & between(x,
                                                     data.ranges %>% filter(gov_eu_supporter=="Eurosceptic Government") %>% select(min_values) %>% unlist(),
                                                     data.ranges %>% filter(gov_eu_supporter=="Eurosceptic Government") %>% select(max_values) %>% unlist()))

point_estimates_store$group <- point_estimates_store$class %>% 
  recode("Europhile Government" = "Pro-EU \nGovernment",
         "Eurosceptic Government" = "Eurosceptic \nGovernment")

point_estimates_store %>% 
  filter(class=="Europhile Government" & between(x,
                                                 data.ranges %>% filter(gov_eu_supporter=="Europhile Government") %>% select(perc_10) %>% unlist(),
                                                 data.ranges %>% filter(gov_eu_supporter=="Europhile Government") %>% select(perc_90) %>% unlist())) %>% 
  filter(topic_name == "Supporting the compromise") %>% 
  group_by(group) %>% 
  summarise(ten_perc = min(means), #Ten percent is now the minimum, because we subset it before to only values between 10th and 90th percentile
            ninety_perc = max(means)) %>% #Ninety percent is now the maximum, because we subset it before to only values between 10th and 90th percentile
  mutate(change=ninety_perc-ten_perc) 

point_estimates_store %>% 
  filter(class=="Europhile Government" & between(x,
                                                 data.ranges %>% filter(gov_eu_supporter=="Europhile Government") %>% select(perc_10) %>% unlist(),
                                                 data.ranges %>% filter(gov_eu_supporter=="Europhile Government") %>% select(perc_90) %>% unlist())) %>% 
  filter(topic_name %in% c("Delaying agreement","More technical-level discussion needed",
                           "Cautious language")) %>% 
  group_by(group,topic_name) %>% 
  slice(c(1,n())) %>% 
  group_by(x,class) %>% 
  summarise(val = sum(means))

point_estimates_store %>% 
  filter(class=="Europhile Government" & between(x,
                                                 data.ranges %>% filter(gov_eu_supporter=="Europhile Government") %>% select(perc_10) %>% unlist(),
                                                 data.ranges %>% filter(gov_eu_supporter=="Europhile Government") %>% select(perc_90) %>% unlist())) %>% 
  filter(topic_name %in% c("Formulating a demand","Raising a concern")) %>% 
  group_by(group,topic_name) %>% 
  slice(c(1,n())) %>% 
  group_by(x,class) %>% 
  summarise(val = sum(means))

point_plot_12 <- ggplot(point_estimates_store %>% filter(topic_number==12),aes(x=x,y=means,ymin=conf.low,ymax=conf.high))+
  geom_smooth(colour="black",se=F)+
  geom_smooth(aes(y=conf.low),se=F,lty="dashed",colour="black")+
  geom_smooth(aes(y=conf.high),se=F,lty="dashed",colour="black")+
  facet_wrap(~group,scales = "free_x")+
  labs(y="Estimated Topic Proportion",x="Public Image of the EU (z-score)",title="Topic 12: Delaying agreement")+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        title = element_text(colour="black",size=10),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(colour = "black"),
        legend.text = element_text(colour = "black"),
        strip.text = element_text(colour = "black",face = "bold"))

point_plot_13 <- ggplot(point_estimates_store %>% filter(topic_number==13),aes(x=x,y=means,ymin=conf.low,ymax=conf.high))+
  geom_smooth(colour="black",se=F)+
  geom_smooth(aes(y=conf.low),se=F,lty="dashed",colour="black")+
  geom_smooth(aes(y=conf.high),se=F,lty="dashed",colour="black")+
  facet_wrap(~group,scales = "free_x")+
  labs(y="Estimated Topic Proportion",x="Public Image of the EU (z-score)",title="Topic 13: Formulating a demand")+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        title = element_text(colour="black",size=10),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(colour = "black"),
        legend.text = element_text(colour = "black"),
        strip.text = element_text(colour = "black",face = "bold"))

point_plot_22 <- ggplot(point_estimates_store %>% filter(topic_number==22),aes(x=x,y=means,ymin=conf.low,ymax=conf.high))+
  geom_smooth(colour="black",se=F)+
  geom_smooth(aes(y=conf.low),se=F,lty="dashed",colour="black")+
  geom_smooth(aes(y=conf.high),se=F,lty="dashed",colour="black")+
  facet_wrap(~group,scales = "free_x")+
  labs(y="Estimated Topic Proportion",x="Public Image of the EU (z-score)",title="Topic 22: Supporting the compromise")+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        title = element_text(colour="black",size=10),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(colour = "black"),
        legend.text = element_text(colour = "black"),
        strip.text = element_text(colour = "black",face = "bold"))

point_plot_32 <- ggplot(point_estimates_store %>% filter(topic_number==32),aes(x=x,y=means,ymin=conf.low,ymax=conf.high))+
  geom_smooth(colour="black",se=F)+
  geom_smooth(aes(y=conf.low),se=F,lty="dashed",colour="black")+
  geom_smooth(aes(y=conf.high),se=F,lty="dashed",colour="black")+
  facet_wrap(~group,scales = "free_x")+
  labs(y="Estimated Topic Proportion",x="Public Image of the EU (z-score)",title="Topic 32: More technical-level \ndiscussion needed")+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        title = element_text(colour="black",size=10),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(colour = "black"),
        legend.text = element_text(colour = "black"),
        strip.text = element_text(colour = "black",face = "bold"))

point_plot_38 <- ggplot(point_estimates_store %>% filter(topic_number==38),aes(x=x,y=means,ymin=conf.low,ymax=conf.high))+
  geom_smooth(colour="black",se=F)+
  geom_smooth(aes(y=conf.low),se=F,lty="dashed",colour="black")+
  geom_smooth(aes(y=conf.high),se=F,lty="dashed",colour="black")+
  facet_wrap(~group,scales = "free_x")+
  labs(y="Estimated Topic Proportion",x="Public Image of the EU (z-score)",title="Topic 38: Cautious language")+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        title = element_text(colour="black",size=10),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(colour = "black"),
        legend.text = element_text(colour = "black"),
        strip.text = element_text(colour = "black",face = "bold"))

point_plot_40 <- ggplot(point_estimates_store %>% filter(topic_number==40),aes(x=x,y=means,ymin=conf.low,ymax=conf.high))+
  geom_smooth(colour="black",se=F)+
  geom_smooth(aes(y=conf.low),se=F,lty="dashed",colour="black")+
  geom_smooth(aes(y=conf.high),se=F,lty="dashed",colour="black")+
  facet_wrap(~group,scales = "free_x")+
  labs(y="Estimated Topic Proportion",x="Public Image of the EU (z-score)",title="Topic 40: Raising a concern")+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        strip.background = element_rect(fill = "white", colour = NA),
        title = element_text(colour="black",size=10),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(colour = "black"),
        legend.text = element_text(colour = "black"),
        strip.text = element_text(colour = "black",face = "bold"))

# This is Figure I6
point_plot_22
ggsave("figures_appendix/figure_i_6.eps", width = 4, height = 3, units = "in")

# This is Figure I7
ggpubr::ggarrange(point_plot_32,point_plot_12,point_plot_38,ncol=2,nrow=2)
ggsave("figures_appendix/figure_i_7.eps", width = 6, height = 6, units = "in")

# This is Figure I8
ggpubr::ggarrange(point_plot_40,point_plot_13,ncol=2,nrow=1)
ggsave("figures_appendix/figure_i_8.eps", width = 6, height = 3, units = "in")

#### ...Plot direct effect of negotiation stages ####

plot.dat.12 <- plot(prep_council_40_save,covariate="Negotiation_Stage",topics=c(12))
plot.dat.12 <- data.frame(y=plot.dat.12$uvals,
                          means=plot.dat.12$means[[1]],
                          ci.low=plot.dat.12$cis[[1]][1,],
                          ci.high=plot.dat.12$cis[[1]][2,]) %>% 
  ggplot(aes(y=y,x=means,xmin=ci.low,xmax=ci.high))+
  geom_point()+
  geom_errorbarh(height=0)+
  labs(y="",x="Estimated Topic Proportion",
       title="Topic 12: Delaying agreement")+
  theme_bw()+
  theme(plot.title = element_text(size=10))

plot.dat.13 <- plot(prep_council_40_save,covariate="Negotiation_Stage",topics=c(13))
plot.dat.13 <- data.frame(y=plot.dat.13$uvals,
                          means=plot.dat.13$means[[1]],
                          ci.low=plot.dat.13$cis[[1]][1,],
                          ci.high=plot.dat.13$cis[[1]][2,]) %>% 
  ggplot(aes(y=y,x=means,xmin=ci.low,xmax=ci.high))+
  geom_point()+
  geom_errorbarh(height=0)+
  labs(y="",x="Estimated Topic Proportion",
       title="Topic 13: Formulating a demand")+
  theme_bw()+
  theme(plot.title = element_text(size=10))

plot.dat.22 <- plot(prep_council_40_save,covariate="Negotiation_Stage",topics=c(22))
plot.dat.22 <- data.frame(y=plot.dat.22$uvals,
                          means=plot.dat.22$means[[1]],
                          ci.low=plot.dat.22$cis[[1]][1,],
                          ci.high=plot.dat.22$cis[[1]][2,]) %>% 
  ggplot(aes(y=y,x=means,xmin=ci.low,xmax=ci.high))+
  geom_point()+
  geom_errorbarh(height=0)+
  labs(y="",x="Estimated Topic Proportion",
       title="Topic 22: Supporting the compromise")+
  theme_bw()+
  theme(plot.title = element_text(size=10))

plot.dat.32 <- plot(prep_council_40_save,covariate="Negotiation_Stage",topics=c(32))
plot.dat.32 <- data.frame(y=plot.dat.32$uvals,
                          means=plot.dat.32$means[[1]],
                          ci.low=plot.dat.32$cis[[1]][1,],
                          ci.high=plot.dat.32$cis[[1]][2,]) %>% 
  ggplot(aes(y=y,x=means,xmin=ci.low,xmax=ci.high))+
  geom_point()+
  geom_errorbarh(height=0)+
  labs(y="",x="Estimated Topic Proportion",
       title="Topic 32: More technical-level discussion needed")+
  theme_bw()+
  theme(plot.title = element_text(size=10))

plot.dat.38 <- plot(prep_council_40_save,covariate="Negotiation_Stage",topics=c(38))
plot.dat.38 <- data.frame(y=plot.dat.38$uvals,
                          means=plot.dat.38$means[[1]],
                          ci.low=plot.dat.38$cis[[1]][1,],
                          ci.high=plot.dat.38$cis[[1]][2,]) %>% 
  ggplot(aes(y=y,x=means,xmin=ci.low,xmax=ci.high))+
  geom_point()+
  geom_errorbarh(height=0)+
  labs(y="",x="Estimated Topic Proportion",
       title="Topic 38: Cautious language")+
  theme_bw()+
  theme(plot.title = element_text(size=10))

plot.dat.40 <- plot(prep_council_40_save,covariate="Negotiation_Stage",topics=c(40))
plot.dat.40 <- data.frame(y=plot.dat.40$uvals,
                          means=plot.dat.40$means[[1]],
                          ci.low=plot.dat.40$cis[[1]][1,],
                          ci.high=plot.dat.40$cis[[1]][2,]) %>% 
  ggplot(aes(y=y,x=means,xmin=ci.low,xmax=ci.high))+
  geom_point()+
  geom_errorbarh(height=0)+
  labs(y="",x="Estimated Topic Proportion",
       title="Topic 40: Raising a concern")+
  theme_bw()+
  theme(plot.title = element_text(size=10))

# This is Figure K4
ggpubr::ggarrange(plot.dat.12,plot.dat.13,plot.dat.22,
                  plot.dat.32,plot.dat.38,plot.dat.40,ncol=1,nrow=6)
ggsave("figures_appendix/figure_k_4.eps", width = 5, height = 7.5, units = "in")
